##################################################
# Replication Code
# Taeyong Park and Andrew Reeves
# "Local Unemployment and Voting for President: Uncovering Causal Mechanisms"
# Summary: Data Setup for 2016 Analysis
##################################################


rm(list = ls())
library(foreign)
library(stringr)


#########################
#
# I. CREATE MERGED DATA #
#
#########################

#######################
# 1. Import Data Sets #
#######################


## CCES data
data=read.dta("CCES16_Common_OUTPUT_Feb2018_VV.dta")

## Unemployment data from the Bureau of Labor Statistics website, https://download.bls.gov/pub/time.series/la/la.data.64.County (RECODED by the authors)
unempDataLAUS = read.csv("LAUS_CountyData_Unemp1615.csv", stringsAsFactors = F)

## Income data from https://www.ers.usda.gov/data-products/county-level-data-sets/download-data/ (RECODED by the authors)
incomeData = read.csv("IncomeData1615.csv", stringsAsFactors = F)


# Unemployment
fourDigits=which(str_detect(unempDataLAUS$fips, "^\\d{4}$")) # detect four digits
fiveDigits=paste("0", unempDataLAUS$fips[fourDigits], sep="")
unempDataLAUS$fips[fourDigits]=fiveDigits

data$unempJan16 = rep(NA, nrow(data))
data$unempFeb16 = rep(NA, nrow(data))
data$unempMar16 = rep(NA, nrow(data))
data$unempApr16 = rep(NA, nrow(data))
data$unempMay16 = rep(NA, nrow(data))
data$unempJune16 = rep(NA, nrow(data))
data$unempJuly16 = rep(NA, nrow(data))
data$unempAug16 = rep(NA, nrow(data))
data$unempSep16 = rep(NA, nrow(data))
data$unempOct16 = rep(NA, nrow(data))
data$unempNov16 = rep(NA, nrow(data))
data$unempDec16 = rep(NA, nrow(data))
data$unempJan15 = rep(NA, nrow(data))
data$unempFeb15 = rep(NA, nrow(data))
data$unempMar15 = rep(NA, nrow(data))
data$unempApr15 = rep(NA, nrow(data))
data$unempMay15 = rep(NA, nrow(data))
data$unempJune15 = rep(NA, nrow(data))
data$unempJuly15 = rep(NA, nrow(data))
data$unempAug15 = rep(NA, nrow(data))
data$unempSep15 = rep(NA, nrow(data))
data$unempOct15 = rep(NA, nrow(data))
data$unempNov15 = rep(NA, nrow(data))
data$unempDec15 = rep(NA, nrow(data))

for (i in 1:nrow(unempDataLAUS)){
  matched=which(unempDataLAUS$fips[i]==data$countyfips)
  data$unempJan16[matched] = unempDataLAUS[,2][i]
  data$unempFeb16[matched] = unempDataLAUS[,4][i]
  data$unempMar16[matched] = unempDataLAUS[,6][i]
  data$unempApr16[matched] = unempDataLAUS[,8][i]
  data$unempMay16[matched] = unempDataLAUS[,10][i]
  data$unempJune16[matched] = unempDataLAUS[,12][i]
  data$unempJuly16[matched] = unempDataLAUS[,14][i]
  data$unempAug16[matched] = unempDataLAUS[,16][i]
  data$unempSep16[matched] = unempDataLAUS[,18][i]
  data$unempOct16[matched] = unempDataLAUS[,20][i]
  data$unempNov16[matched] = unempDataLAUS[,22][i]
  data$unempDec16[matched] = unempDataLAUS[,24][i]
  data$unempJan15[matched] = unempDataLAUS[,3][i]
  data$unempFeb15[matched] = unempDataLAUS[,5][i]
  data$unempMar15[matched] = unempDataLAUS[,7][i]
  data$unempApr15[matched] = unempDataLAUS[,9][i]
  data$unempMay15[matched] = unempDataLAUS[,11][i]
  data$unempJune15[matched] = unempDataLAUS[,13][i]
  data$unempJuly15[matched] = unempDataLAUS[,15][i]
  data$unempAug15[matched] = unempDataLAUS[,17][i]
  data$unempSep15[matched] = unempDataLAUS[,19][i]
  data$unempOct15[matched] = unempDataLAUS[,21][i]
  data$unempNov15[matched] = unempDataLAUS[,23][i]
  data$unempDec15[matched] = unempDataLAUS[,25][i]
  if (i %% 1000 == 0) print(i)
}

incomeData$fips = as.character(incomeData$fips)
fourDigits=which(str_detect(incomeData$fips, "^\\d{4}$")) # detect four digits
fiveDigits=paste("0", incomeData$fips[fourDigits], sep="")
incomeData$fips[fourDigits]=fiveDigits

dataMerged = merge(data, incomeData, by.x = "countyfips", by.y = "fips")

write.csv(dataMerged, "cces16Merged.csv", row.names = F)

###############################
#
# END - I. CREATE MERGED DATA #
#
###############################



###############################
#
# II. DATA SETUP & IMPUTATION #
#
###############################

rm(list = ls())
library(foreign); library(Amelia); library(stringr)
data=read.csv("cces16Merged.csv", stringsAsFactors = F)

## County ID
J = length(unique(data$countyfips))
for (i in 1:J){
  data$countyID[data$countyfips==unique(data$countyfips)[i]] = i
}
table(data$countyID)

## State ID
L = length(unique(data$inputstate))
for (i in 1:L){
  data$stateID[data$inputstate==unique(data$inputstate)[i]] = i
}
table(data$stateID)


## Presidential vote
table(as.factor(data$CC16_410a)) # vote
data$pvote2 = rep(NA, nrow(data))
data$pvote2[as.numeric(as.factor(data$CC16_410a))==4] = 1
data$pvote2[as.numeric(as.factor(data$CC16_410a))==1] = 0
table(data$pvote2) # 1 = Clinton; 0 = Trump --> Two party




## Evaluations of national economy

# Retrospective Eval
table(as.factor(data$CC16_302))
data$natecon5 = rep(NA, nrow(data))
data$natecon5[as.numeric(as.factor(data$CC16_302))==2] = 1
data$natecon5[as.numeric(as.factor(data$CC16_302))==1] = 2
data$natecon5[as.numeric(as.factor(data$CC16_302))==6] = 3
data$natecon5[as.numeric(as.factor(data$CC16_302))==4] = 4
data$natecon5[as.numeric(as.factor(data$CC16_302))==3] = 5
table(data$natecon5) # 1=much better; ... 5=much worse



## COVARIATES

# Gender
table(as.factor(data$gender))
data$female = rep(0, nrow(data)) # no missing
data$female[as.numeric(as.factor(data$gender)) == 1] = 1
table(data$female)

# Age
data$age=2016-data$birthyr
table(data$age)

# Race
table(as.factor(data$race))
data$raceNew = as.numeric(as.factor(data$race))
data$raceNew[data$raceNew != 2 & data$raceNew != 3] = 0
data$raceNew[data$raceNew == 2] = 1 # black
data$raceNew[data$raceNew == 3] = 2 # hispanic
table(data$raceNew)

# Employment status
table(as.factor(data$employ))
data$employment = as.numeric(as.factor(data$employ))
data$employment[data$employment != 1 & data$employment != 4 & data$employment != 8 & data$employment != 9] = 0
data$employment[data$employment == 4] = 2 # part-time
data$employment[data$employment == 8 | data$employment == 9] = 3 # unemployed
table(data$employment) # 1=Full 2=part; 3=unemployed

# Income
table(data$faminc)
data$income = rep(NA, nrow(data))
data$income[as.numeric(as.factor(data$faminc))==17]=1
data$income[as.numeric(as.factor(data$faminc))==1]=2
data$income[as.numeric(as.factor(data$faminc))==6]=3
data$income[as.numeric(as.factor(data$faminc))==9]=4
data$income[as.numeric(as.factor(data$faminc))==11]=5
data$income[as.numeric(as.factor(data$faminc))==12]=6
data$income[as.numeric(as.factor(data$faminc))==14]=7
data$income[as.numeric(as.factor(data$faminc))==15]=8
data$income[as.numeric(as.factor(data$faminc))==16]=9
data$income[as.numeric(as.factor(data$faminc))==2]=10
data$income[as.numeric(as.factor(data$faminc))==3]=11
data$income[as.numeric(as.factor(data$faminc))==4]=12
data$income[as.numeric(as.factor(data$faminc))==7]=13
data$income[as.numeric(as.factor(data$faminc))==8]=14
data$income[as.numeric(as.factor(data$faminc))==10]=15
data$income[as.numeric(as.factor(data$faminc))==13]=16
table(data$income)

# Education
table(as.factor(data$educ))
data$educNew = as.numeric(as.factor(data$educ))
data$educNew[data$educNew==5] = 0 # post-grad
data$educNew[data$educNew==1 | data$educNew==6] = 1 # someCollege
table(data$educNew) # 1 = someCollege; 2 = fourCollege; 3=HighSchool; 4=noHigh

# Own/Rent
table(as.factor(data$ownhome))
data$ownHome = rep(NA, nrow(data))
data$ownHome[as.numeric(as.factor(data$ownhome))==2] = 1
data$ownHome[as.numeric(as.factor(data$ownhome))!=2] = 0
table(data$ownHome)

# Party ID
table(as.factor(data$pid3))
data$party3 = as.numeric(as.factor(data$pid3))
data$party3[data$party3==3 | data$party3==4] = 0
data$party3[data$party3==5] = 3
table(data$party3) # 1=dem; 2=ind; 3=rep

# Ideology
table(as.factor(data$ideo5))
data$ideol = rep(NA, nrow(data))
data$ideol[as.numeric(as.factor(data$ideo5))==4] = NA
data$ideol[as.numeric(as.factor(data$ideo5))==6] = 1
data$ideol[as.numeric(as.factor(data$ideo5))==2] = 2
data$ideol[as.numeric(as.factor(data$ideo5))==3] = 3
data$ideol[as.numeric(as.factor(data$ideo5))==1] = 4
data$ideol[as.numeric(as.factor(data$ideo5))==5] = 5
table(data$ideol) # 1=very liberal ... 5=very conservative

# News interest
table(as.factor(data$newsint))
data$newsInt = rep(NA, nrow(data))
data$newsInt[as.numeric(as.factor(data$newsint))==1]=NA
data$newsInt[as.numeric(as.factor(data$newsint))==2]=1
data$newsInt[as.numeric(as.factor(data$newsint))==4]=2
data$newsInt[as.numeric(as.factor(data$newsint))==5]=3
data$newsInt[as.numeric(as.factor(data$newsint))==3]=4
table(data$newsInt) # 1=hardly; 2=only now and then; 3=some; 4=most

### Create a data frame: df ###
df=data[, c("countyname",
             "pvote2", "natecon5", "stateID", "countyID",
             "unempJan15", "unempFeb15", "unempMar15",
             "unempApr15", "unempMay15", 
             "unempJune15", "unempJuly15", "unempAug15",
             "unempSep15", "unempOct15", "unempNov15", "unempDec15",
             "unempJan16", "unempFeb16", "unempMar16",
             "unempApr16", "unempMay16", 
             "unempJune16", "unempJuly16", "unempAug16",
             "unempSep16", "unempOct16", "unempNov16", "unempDec16",
             "inc16", "inc15",
             "age", "female", "raceNew",
             "employment",
             "income", "educNew",
             "ownHome", "party3", "ideol", "newsInt")]



df=df[-(which(is.na(df$pvote2))),]

set.seed(123)
data16Imputed = amelia(df[,-1], m=1, ords=c("natecon5", 
                                             "stateID", "countyID",
                                             "age", "female", "raceNew",
                                             "employment",
                                             "income", "educNew",
                                             "ownHome", "party3", "ideol", "newsInt"))
write.amelia(data16Imputed, separate=T, "data16Imputed", format="dta")
#####################################
#
# END - II. DATA SETUP & IMPUTATION #
#
#####################################

